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Appendix A: Example of Server Executable Code In 
Recognition/Annotation Process 



5 

bl_annotate .pi 

# ! /usr/bin/perl -w 

10 ###### 

# Script to annotate the buylites 

# Written by Henri 
# 

###### 

15 

use strict; 

use LWP: :UserAgent; 

use Apache: : Registry; 

use Time : : HiRes qw (time); 

20 

########## p u n in words from page and compare to hash tree ########### 
my $url; 

my $url_unescape; 
my $textdata; 

25 my @tempmatchingids=() ; # temp array to return longest matching 

word 

my %finallist=() ; # hash that holds the final matching proper 

nouns as its keys 
my $i=0; 
30 my $j=0; 

my $res= T T ; 
my ©words; 

my $max__annotations = 15; # steve - 5.24.01 

35 $url •= $ENV{ 1 QUERY_STRING 1 } ; 

$url =~ s/ri&)ref=([ A &]+) .*$/$2/o; 
$url_unescape = $url; 

$url_unescape s/% ( [0-9A-Fa-f ] { 2 } ) /chr (hex ($1 ) ) /eg; # Unescape the 
stuff 

40 

############ Get the Page ######### 

# Need to use LWP: : User Agent instead of LWP::Simple because we need the 
timeout function 

my $ua = new LWP: : UserAgent ; 
45 $ua->timeout (10) ; # 10 second timeout 

my $request = new HTTP : : Request ( 1 GET 1 , $url__unescape) ; 
$textdata = $ua->request ( $request ) ; 
if ($textdata->is_success) { 

$textdata = $textdata->content ; 
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} else { 

$textdata = 1 1 ; 

} 

######### Done w/ the Page ######## 
my $aa = time; 

# $textdata =- s/<IMG [ ">] +>//igo; 
$textdata s/<[A|a] . +?\/ [A | a] >//go; 
$textdata =- s/<SCRIPT . +?<\/SCRIPT>//igo; 
$textdata =~ s/<HEAD( | >) . +?\/HEAD>//igo; 

# $textdata =- s/<FOR!YL + ?<\/FORM>//igo; 

# $textdata =~ s/<obj ect . +?<\/obj ect>//igo; 

# $textdata =~ s/<embed. +?<\/embed>//igo; 
$textdata =~ s/< [ A >] +>//go; 

$textdata =- s/& [ A ; ] +; //go; 

# $textdata =~ s/\ [[*]]+] //igo; 

@words = split / [ A a-zA-Z0-9\- 1 \/ ] +/ , $textdata; 

my $wordcount = scalar (@words); 

my $numlist = 0; 

my $stringtosearch = T ' ; 

my $nounid; 

my $maxwords = 10; # maximum number of words in one buylite string 

######### BEGIN RECOGNITION ########### 

while ( ($i < $wordcount) && ($numlist < $max_annotations) ) { 

# Throw out the 1st word if it 1 s not in the first word list 

# This is only to accelerate dramatically the searching 
if (not exists ( $ANNOTATOR: : f irstword{ $words [ $i] } ) ) { 

$i++; 
next; 

} 

$j=$i; 

$stringtosearch = f f ; 

while ({$j-$i <= $maxwords) && ($j < $wordcount) ) { 

# Are we not yet at end of file? 

$stringtosearch .= $words[$j]; 

if (exists ($ANN0TAT0R: : pnlist { $stringtosearch} ) ) { 
push @tempmatchingids, $stringtosearch; 
$i=$j+l; 

} 

$j++; 

$stringtosearch .= 1 T ; 

} 

if (scalar (@tempmatchingids) > 0) { # did we have 

earlier full keyphrase matching? 

$f inallist {pop @tempmatchingids } = 1; 
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$numlist++; 

@tempmatchingids = (); 
} else { 

$i++; 

} 

} # while 

########### END RECOGNITION ############ 
$aa = time - $aa; 

print STDERR ("$url_unescape $aa\n"); 

########### Output ########### 

my $linkstag; 

my $cache_header; 

my $outfile; 

if ($numlist == 0) { # No buylites found 

$outfile = 'function makeChanges { ) {}; function blHighlight ( ) 

{}'; 

goto PRINTOUTPUT; 

} 

$ENV{ f QUERY_STRING 1 } =~ / ( A I & ) pn= ( [ A & ] ) /o ; 

if ((defined $2) && ($2 eq 'n')) { # buylites disabled 

through cookie 

$outfile = 'function makeChanges ( ) {}; function blHighlight ( ) { 
bllnitUI (BL_HIDE) ; } ' ; 

goto PRINTOUTPUT; 

} 

# From here onwards we found buylites and they are enabled on the 
client 

$outfile - « "EOT" ; 

function makeChanges ( ) { var i9i; var refurl = 1 $url 1 ; [LINKS_TAG] 
} function blHighlight ( ) { TAG_NAME=new 

Array () ; TAG_NAME [ 0 ] = 1 P ' ; TAG_NAME [ 1] - 1 LI 1 ; TAG_NAME [2] = T UL ' ; TAG_NAME [ 3] = ' 
TD 1 ;TAG_NAME[4]= , DL f ;var pnshown=0 ; f or (var 
i=0 ; i<TAG_NAME . length; {var 

ps-document . all . tags ( TAGJNAME [ i ] ) ; for ( j =0 ; j <ps . length ; j ++ ) { g_html=ps { j ) 
. innerHTML; g_outer=ps ( j ) . outerHTML; g_tag=g_outer . substring { 0 , g_outer . in 
dexOf ( ■>' )+l) ;if ( (g_html . length>-5000 ) | | {g_html== f 1 ) ) continue; for (var 
p=l ; p<pnpatbeg . length; p++ ) g_html=replacepat ( g_html , p ) ; chgf lag=0 ; pntmpli 
st= f T ; makeChanges ( ) ;for (var p=pnpatbeg* length- 1 ;p>0 ;p — 
)g_html=expandpat {g_html,p) ; if (chgf lag==l) {var 

pnrv=l ; if (i ! =3 ) pnrv=setOuterHTML (ps ( j ) , g__tag+g_html+ ' </ 1 +TAG_NAME [i] + 1 > 
T ) ;else pnrv=set InnerHTML (ps ( j ) , g_html) ;if (pnrv==0) pnshown=l ; }for (var 
p=l;p<pnpatbeg.length;p++)pnpattotal [p]=0; } }if (pnshown==l ) bllnitUI (BL_S 
HOW) ; } 
EOT 

foreach $res (keys %finallist) { 
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# $linkstag = "changeHTML ( \" " . $ANNOTATOR: : pnlist { $res } . " \" f 

\"http: //colostage.bizrate.com/buylites/landing.xpml?keyword=".$ANNOTAT 
OR: : pnlist {$res} . "&af f_id=-l&noun_id=" . $res. "&rf=buy" . $res. " | - 
l&ref_url=\" + refurl);"; 

$linkstag - "changeHTML ( V" . $res . "\ " , 
V'http: //colostage.bizrate . com/buylites/landing . xpml?keyword=" . $res . "&a 
ff_id=- 

l&noun_id=" . $ANNOTATOR: : pnlist { $res } . " &rf =buy" . $ANNOTATOR : : pnlist { $res } 
. " |-l&ref_url=\" + refurl);"; 

$outfile =~ s/\ [LINKS_TAG] /$linkstag [LINKS_TAG] / ; 

} 

$outfile =~ s/\[LINKS_TAG] //o; 
PRINTOUTPUT: 

print "HTTP/1.0 200 OK\n"; 

print "Content-Length: " . length ($out file) . "\n"; 
print "Cache-Control: max-age=864 00\n" ; 
print "Content-type: text/html\n\n" ; 
print $outfile; 



© BizRate.com 



GanzLaw, PC 
PO Box 10105 
Portland, Oregon 97296 
Phone: (503)228-3641 
Docket No.: BIZ/01-0003 

Express Mail No: EL627039307US, Deposited June 12, 2001 



-60- 



■SSS?!); 



Appendix B: Example of Client Executable Code In Recognition/ Annotation 
Process 

The following four server-side components are sent by one or more servers to a 
5 client for execution in the following order on the client: 

buylites.js (JScript file) [Normally found on content document initially 
executed on a client system] 
bl_style.css (stylesheet) 
bl_vb.js (VBScript file) 
10 bl_start.js (JScript file) 

buvlites.is 

var BL_BURL = 'http://xxx.xxxxx.com/buylites'; 

var BL_SURL = BL_BURL+'/bl'; 

var BLJURL = BL_SURL+'/images'; 
1 5 var bINavAgt = navigator.userAgent.toLowerCase(); 

var bllsMajor = parselnt(navigator.appVersion); 

var bllslE = (blNavAgt.indexOf("msie") != -1); 

var bllslE4 = (blNavAgt.indexOf("msie 4") != -1 ); 

var bllslE4up = (bllslE && (bllsMajor >= 4)); 
20 var bllsWin = (blNavAgt.indexOf("windows ")!=-1 ); 

if (bllslE4up && bllsWin) 
{ 

if (typeof(BL_AFF_ID) == 'undefined') BL_AFF_ID = -1; 

25 document. write('\ 

<link href=" , +BL_SURL+'/bl_style.css" rel=stylesheet type="text/css">\ 
<script language=jscript src- "+BL_SURL+7bl_main.js"></script>\ 
<script language=VBScript src="'+BL_SURL+ , /bl_vb.js"></script>\ 
<script language=jscript 

30 src="'+BL_BURL+'/r1 .pl?affjd='+BL_AFF_ID+'"></script>\ 

<script language=jscript src="'+BL_SURL+ l /bLstart.js"></script>'); 

} 
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MS 



bl stvle.css 

.boxtextl {color:#000197;font-family: Arial, Helvetica, sans-serif; font-size: 12; 
font-weight: bold;} 

.boxtext2 {color:#FF6600;font-family: Verdana, Arial, Helvetica, sans-serif; font- 
5 size: 16; font-weight: bold;} 

.boxtext3 {color:#000000;font-family: Verdana, Arial, Helvetica, sans-serif; font- 
size: 13; font-weight: bold;} 
.boxtext4 {colonblue} 

.boxtext5 {color:#000197;font-family: Arial, Helvetica, sans-serif; font-size: 11;} 
1 0 .boxtext6 {color:black;font-family: Arial, Helvetica, sans-serif; font-size: 1 2;} 

.boxtext7 {color:black;font-family: Arial, Helvetica, sans-serif; font-size: 11;} 

.pnlg {background-color=#fcff00;BACKGROUND-IMAGE: 

url(/buylites/bl/images/cornerg1 .gif);BACKGROUND-POSITION:left 

top;BACKGROUND-REPEAT:no-repeat;cursor:hand} 
15 .pncg {background-color=#fcffOO;cursor:hand} 

.pnrg {text-decoration:none;background-color=#fcff00;BACKGROUND-IMAGE: 

url(/buylites/bl/images/cornerg2.gif);BACKGROUND-POSITION:right 

bottom;BACKGROUND-REPEAT:no-repeat;cursor:hand} 

.pnllg {background-color=#fcff00;BACKGROUND-IMAGE: 
20 url(/buylites/bl/images/comerlg1 .gif);BACKGROUND-POSITION:left 

top;BACKGROUND-REPEAT:no-repeat;cursor:hand} 

.pnclg {background-color=#fcffOO;cursor:hand} 

.pnrlg {text-decoration:none;background-color=#fcff00;BACKGROUND-IMAGE: 
url(/buylites/bl/images/comerlg2.gif);BACKGROUND-POSITION:right 

25 bottom;BACKGROUND-REPEAT:no-repeat;cursor:hand} 

.pnl1 {color:#000000;background-color=#fcff00;BACKGROUND-IMAGE: 
url(/buylites/bl/images/corner1.gif);BACKGROUND-POSITION:left 
top;BACKGROUND-REPEAT:no-repeat;cursor:hand} 
.pnd {color:#000000;background-color=#fcffOO;cursor:hand} 

30 .pnr1 {color:#000000;background-color=#fcff00;BACKGROUND-IMAGE: 
url(/buylites/bl/images/comer2.gif);BACKGROUND-POSITION:right 
bottom;BACKGROUND-REPEAT:no-repeat;cursor:hand} 
.12 {background-color=#1 1fff1 ;BACKGROUND-IMAGE: 

url(/buylites/bl/images/corner1 .gif);BACKGROUND-POSITION:left 

35 top;BACKGROUND-REPEAT:no-repeat;} 

.c2 {background-image: url(/buylites/bl/images/pattern1 blue.gif);background- 
position: left bottom;background-repeat:repeat-x} 

.r2 {BACKGROUND-IMAGE: 
url(/buylites/bl/images/pattern1_dotblue.gif);BACKGROUND-POSITION:center 
40 top;BACKGROUND-REPEAT:no-repeat;} 
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.13 {background-color=#1 1fff1 ; B ACKG ROU N D-l MAGE : 

url(/buylites/bl/images/corner1 .g if); B ACKG ROU N D-POS ITI O N : I eft 
top;BACKGROUND-REPEAT:no-repeat;cursor:hand} 
.c3 {background-color=#1 1fff1 ;cursor:hand} 
5 .r3 {text-decoration:none;background-color=#11fff1 ;BACKGROUND-IMAGE: 
url(/buylites/bl/images/corner2.gif);BACKGROUND-POSITION:right 
bottom;BACKGROUND-REPEAT:no-repeat;cursor:hand} 
.pnspan {text-decoration:none;color=#000099;background- 
color=#CCCCCC;BACKGROUND-IMAGE: 
1 0 url(http://images.bizrate.com/site/un_b_bold.gif);BACKGROUND-POSITION:left 
bottom;BACKGROUND-REPEAT:repeat-x;} 
.pnspan2 {background-col9r=#1 1fff1 ; cursor:hand} 
.pnspan3 {background-color=#1 1fff1 ;} 

.pnul {text-decoration:none; color.#000000; background-color=#fcffOO; 
15 BACKGROUND-IMAGE: 

url(/buylites/bl/images/underline_buylites.gif);BACKGROUND-POSITION:left 
bottom; BACKGROUND-REPEAT:repeat-x; cursor:hand;} 



bl vb.js 

20 Function setlnnerHTML(elem, val) 
ON ERROR RESUME NEXT 
elem.innerHTML = val 
setlnnerHTML = Err.Number 
END Function 

25 

Function setOuterHTML(elem, val) 
ON ERROR RESUME NEXT 
elem.outerHTML = val 
setOuterHTML = Err.Number 
30 END Function 



bl start.js 

35 var bITry = 0; 

function blShowBLs() 
{ 

if (typeof(blHighlight) == 'function' && typeof(makeChanges) == 'function') 
40 { 

blHighlightQ; 
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} 

else 
{ 

blTry++; 

if (bITry < 10) 

setTimeoutCblShowBLsO;', BL_DELAY_RETRY); 

} 



} 

10 window.onload = bIShowBLs; 
© BizRate.com 
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A ppendix C: Example of Client-Side Annotation and Recognition Code 



Defiler.cpp 

GnomelEPageBroker.cpp 
5 HTMLAnnotatorProxy.cpp 
HTMLParser.cpp 
lEDocUtils.cpp 
ProperNounDB.cpp 



10 Defiler.cpp 

#include <stdafx.h> 
a #include "Defiler.h" 

\D #include "HTMLParser.h" 

CO #include "ProperNounDB.h" 

CO 15 #include "bizratestring.h" 
H #include <sstream> 

i=i #include "ebRegistryMgr.h" 

in #include "eBBarRegistryMgr.h" 

IU #include "eBconst.h" 

b 20 #include "eBScriptUtil.h" 
13 #include "lEDoclItils.h" 

M using namespace std; 

O 25 const int Defiler::AFFILIATE_ID = -2; 

Defiler::Defiler(ProperNounTable *pnTable,const string SserverBaseURL) { 

this->pnTable=pnTable; // This object takes ownership of the 

pnTable. 

30 

eBRegistryMgr regMgrLM; 

regMgrLM.InitAppKey( H KEY_LOCAL_M ACH I N E, eb_eboodle ); 

35 // Get Version servlet 

CComBSTR redirectServlet; 

regMgrLM.GetProfileString(RS_PRPOERNOUN, RS_PN_REDIRECT_SERVLET, 
&redirectServlet ); 
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10 



40 



45 



if (redirectServlet.Length() == 0) 
{ 

Writel_og(EBLOG_OBJECT, 0, "Defiler::Defiler: Invalid redirect servlet\n"); 
return; 

} 

_bstr_t bstrRedirectServlet = redirectServlet.m_str; 

annotationServletURL=serverBaseURL+((char *)bstrRedirectServlet); 

} 



Defiler::~Defiler(){ 

delete pnTable; 

} 

15 

string *Defiler::AnnotateHTML(const string *originalHTML, IHTMLEIement *elem, 
CComPtr<IWebBrowser2> plWebBrowser, IDispatch* pDisp, bool SbAddSpanTag) { 

string *annotatedHTML = NULL; 

AnnotationResultretval=UNCHANGED; 
20 HTMLParser parser(*originalHTML); 

const DocumentWordList *docWords=parser.GetDocumentWords(); 

ANNOTATION_SET*annSet=GetAIIAnnotations(docWords,originalHTML); 
HRESULT hinsertedCode; 
25 if(annSet->size()>0) 

{ 

if (IbAddSpanTag) 
{ 

CComPtr<IWebBrowser2> pMainWebBrowser; 
30 pDisp->Querylnterface(IID_IWebBrowser2,(void **)&pMainWebBrowser); 

hinsertedCode = lnsertProperNounScript(plWebBrowser, 

pMainWebBrowser); 

if(hinsertedCode == S_OK) 
bAddSpanTag = true; 

35 } 

if(bAddSpanTag) 

annotatedHTML=lnsertPopup(annSet,originalHTML, elem, 
plWebBrowser, pDisp); 

} 



delete annSet; 

return annotatedHTML; 

} 

HRESULT Defiler::insertProperNounScript(CComPtr<IWebBrowser2> plWebBrowser, 
IDispatch* pDisp) 
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{ 

CComPtr<IWebBrowser2> pmain WebBrowser; 
if(pDisp) 

pDisp->Querylnterface(IID_IWebBrowser2,(void **)&pmainWebBrowser); 
5 CComPtr<IHTMLDocument2> pdoc; 

if(pmainWebBrowser) 
{ 

CComPtr<IDispatch> pdispDoc; 
pmainWebBrowser->get_Document(&pdispDoc); 
1 0 pdispDoc->QueryInterface(IIDJHTMLDocument2 ) (void **)&pdoc); 

} 

if(!pdoc) 

return S_FALSE; 
CComVariant variantList[1]; 
1 5 variantl_ist[0].vt = VTJDISPATCH; 

pdoc->Querylnterface(IID_IDispatch J (void **)&(variantList[0].pdispVal)); 
CComVariant vResult; 

eBScript::ExecFunc( plWebBrowser, L"popupx", 1, variantList, &vResult ); 
if (vResult.vt == VT_l4'&& vResuit.intVal == 1 ) 
20 return S_OK; 

else 

return S_FALSE; 

} 

25 

string *Defiler::lnsertPopup(ANNOTATION_SET *annSet,const string *originalHTML, 

IHTMLEIement *elem, CComPtr<IWebBrowser2> plWebBrowser, 

IDispatch* pDisp) { 

ostringstream oss; 

30 

ANNOTATION_SET::const_iterator iter=annSet->begin(); 
ANNOTATION_SET::const_iterator theEnd=annSet->end(); 

int nextPlaceToCopy=0; 

35 

while(iter != theEnd) { 

const AnnotationWrapper &wrapper=*iter; 
const Annotation *annotation=wrapper.GetAnnotation(); 
int startPos=annotation->GetStartlndex(); 
40 int endPos^annotation^GetEndlndexO; 

string htmlPN = originalHTML->substr(startPos,endPos-startPos); 

string htmlStart = originalHTML->substr(nextPlaceToCopy,startPos- 
45 nextPlaceToCopy); 

oss « htmlStart; 
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stri ng *an notation U RL= Generate Annotation U RL(*a nnotation , htm I PN ); 

CComBSTR bstrSpan; 
GetSpanHTML(plWebBrowser, pDisp, htmlPN, *annotationURL, bstrSpan); 

5 

if (bstrSpan.Length() > 0) 
{ 

_bstr_ttmpSpan = bstrSpan. m_str; 
oss « (char *)tmpSpan; 
10 } 

//string outputString = string(oss.str()); 
//WriteLog(EBLOG_OBJECT, 0, "DefiIer::lnsertPopup: AII_HTML=%s\n", 
outputString.c_str()); 

15 

nextPlaceToCopy=endPos; 
iter++; 

} 

20 oss « originalHTML->substr(nextPlaceToCopy); 

//string outputString = string(oss.str()); 
//WriteLog(EBLOG_OBJECT, 0, "Defiler::lnsertPopup: AII_HTML=%s\n", 
outputString. c_str()); 

string annotatedHTML = oss.str(); 
IEDocUtils::SetHTML(elem, annotatedHTMLc_str()); 



25 



return new string(oss.str()); 

30 } 

HRESULT Defiler::GetSpanHTML(CComPtr<IWebBrowser2> pi WebBrowser, IDispatch* 
pDisp, 

const string &strPN, const string &urlPN, CComBSTR 

35 SbstrSpan) 
{ 

int retvai=0; 

40 CComVariant variantList[3]; 

CComBSTR bstrURL = uriPN.c_str(); 
variantList[0].vt = VT_BSTR; 
variantList[0].bstrVal = bstrURLCopyQ; 



45 



CComBSTR bstrPN = strPN.c_str(); 
variantList[1].vt = VT_BSTR; 
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variantList[1].bstrVal = bstrPN.Copy(); 
variantList[2].vt = VT_DISPATCH; 

pDisp->Querylnterface(IIDJDispatch,(void **)&(variantList[2].pdispVal)); 

CComVariant vResult ; 
eBScript::ExecFunc( plWebBrowser, L"getSpanTag", 3, variantList, SvResult ); 



1 0 if (vResult.vt == VT_BSTR) 

{ 

bstrSpan = vResultbstrVal; 
return S_OK; 

} 

1 5 else 

return S_FALSE; 

} 



20 ANNOTATION_SET *Defiier::GetAIIAnnotations(const DocumentWordList 

*docWords,const string *originalHTML) { 

ANNOTATIONJ3ET *set=new ANNOTATION_SET; 

unsigned int numWords=docWords->NumWords(); 
25 unsigned int currentWord=0; 

while(currentWord<numWords) { 

ProperNounTable *curfable=pnTable; 

const DocumentWord *firstWord=docWords->GetWord(currentWord); 
30 const DocumentWord *lastWord=firstWord; 

charfirstChar=originalHTML->at(firstWord->GetStringlndex()); 
if( (::isUpperCaseLetter(firstChar)) || (::isNumber(firstChar)) ) { 
// Only check for annotations if the first character is 
35 // upper case or a number. 

while(true) { 

if(currentWord>=numWords) 
break; 

40 const DocumentWord *tmpWord=docWords- 

>GetWord(currentWord); 

const string *word=tmpWord->GetWord(); 

ProperNounTable *nextTable=curTable- 
45 >GetTableForWord(*word,false); 

if(nextTable==NULL) 
break; 
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M 



curTable=nextTable; 
lastWord=tmpWord; 
currentWord++; 



} 



if(curTable==pnTable) { 

// Never annotate at the root table. Skip over this word 
10 // becayse it matched nothing. 

currentWord++; 

} else { 

// Okay, so some words matched. See if there is a proper noun 
// associated with the last table that contained a proper noun 
15 //word. 

const ProperNoun *properNoun=curTable->GetProperNoun(); 
if(properNoun!=NULL) { 

// Found a proper noun match. Add the annotation. 
Annotation annotation; 
20 annotation. SetAssociatedProperNoun(properNoun); 

an notation . SetStartl nd ex(f irstWord->GetStri ng I nd ex( )); 
an notation . SetEnd I ndex(IastWo rd- 
>GetStringIndex()+lastWord->GetWord()->si2e()); 

set->insert(AnnotationWrapper(&annotation)); 

25 } 
} 

} 

return set; 

30 } 



35 string *Defiler::GenerateAnnotationllRL(const Annotation &annotation,string Keywords ) 
{ 

const ProperNoun *pn=annotation.GetAssociatedProperNoun(); 
ostringstream oss; 

40 oss « annotationServletURL «"?keyword="«Keywords« "&aff_id=" « 

AFFILIATEJD 

« "&noun_id=" « pn->Getld() « "&fb=1&rf=bar H «pn->Getld() « 

aff_id=" ; 

45 return new string(oss.str()); 
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GnomelEPaqeBroker.cpp 

#include <stdafx.h> 
#include "GnomelEPageBroker.h" 
5 #include "lEDocUtils.h" 
#include "GnomeConfig.h" 
#include "ListLoader.h" 
#include <string> 
#include "eBScriptUtii.h" 

10 

using namespace std; 



////Defiler *GnomelEPageBroker::defiIer = NULL; 

15 

////int Gnome! EPageBroker::defilerRefCount = 0; 

//BRLock *GnomelEPageBroker::refCountLock = LockFactory::MakeLock(); 

20 

\Q GnomelEPageBroker::GnomelEPageBroker(GnomeBaseConfig *config, int 

CO pnListServerVersion) { 

CO II This object takes ownership of config. 

H 25 defiler=NULL; 

W this->config=config; 

|R Init(pnListServerVersion); 

e . i 

r } 

£3 30 



GnomelEPageBroker::-GnomeIEPageBroker() { 
delete annotatorProxy; 
35 delete config; 

// Decrease the reference count on the defiler. If the count is 
// zero, delete it; otherwise, keep it around because other people 
// are using it. 
40 /*refCountLock->Lock(); 

defilerRefCount-; 
if(defilerRefCount==0) { 
7 

45 if(defiler) 

delete defiler; 
defiler=NULL; 
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////} 

////refCountLock->Unlock(); 



int GnomelEPageBroker::ProcessAIIFrames(CComPtr<IWebBrowser2> plWebBrowser, 
IDispatch * pDisp) { 

if( (defiler==NULL)||!pDisp) 
10 { 

WriteLog("GnomelEPageBroker::ProcessAIIFrames: annotations off\n"); 
return 0; // Disabled annotations. 

} 

CComPtr<IWebBrowser2> pmainWebBrowser; 
15 if(pDisp) 

pDisp->Querylnterface(IID_IWebBrowser2,(void **)&pmainWebBrowser); 
CComPtr<IHTMLDocument2> pdoc; 
if(pmainWebBrowser) 
{ 

20 CComPtr<IDispatch> pdispDoc; 

pmainWebBrowser->get_Document(&pdispDoc); 
pdispDoc->Querylnterface(IID_IHTMLDocument2,(void **)&pdoc); 

} 

if(!pdoc) 
25 return 0; 

CComVariant variantList[1]; 
variantl_ist[0].vt = VTJDISPATCH; 

pdoc->Querylnterface(IID_IDispatch ) (void **)&(variantList[0].pdispVal)); 
CComVariant vResult; 

30 

eBScript::ExecFunc( plWebBrowser, U'isValidtoAnnotate", 1 , variantList, 
SvResuIt ); 

if (vResult.vt == VT_I4 && vResult.intVai == 1) 
{ 

35 ProcessPage(pdoc,true, pDisp, plWebBrowser); 

} 

return 0; 

} 

int GnomeIEPageBroker::ProcessPage(IHTMLDocument2 *doc,booI updateURLList, 
40 IDispatch* pDisp, CComPtr<IWebBrowser2> plWebBrowser) { 
//Annotate a single page. 

WriteLog("GnomelEPageBroker::ProcessPage: in\n"); 
/*if(updatellRLList) { 

char *url=IEDocUtils::GetURL(doc); 
45 urlList.AddURL(url); 

deletef] url; 

}*/ 
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// Annotate the document. 

IEDocUtils::WalkThroughDocument(annotatorProxy, doc, plWebBrowser, pDisp); 

5 return 0; 

} 



//bool GnomelEPageBroker::Enabled() { 
10 // return config->GetAnnotationsEnabled(); 
11} 



void GnomelEPageBroker::lnit(int pnListServerVersion) { 
1 5 annotatorProxy=NULL; 

if(defiler==NULL) { 

// Only do these steps if the defiler has not been set up yet. 
string *serverURLBase=config»>GetServerURLBase(); 
20 ListLoader loader(*serverURLBase); 

delete serverURLBase; 
int clientPNListVer = config->GetPNListVersion(); 

if(pnListServerVersion > clientPNListVer) { 
// A newer version is available. 
25 // Load the proper noun list from the network. 

loader.GetProperNounsFromURL(new 
AsynchronousLoadCallback(this),pnListServerVersion); 
} else { 

// A newer version is not available. Load it from the 
30 //filesystem. 

string *cacheFilename=config->GetCacheFilename(); 
ProperNounTable 
*pnTable=loader.GetProperNounsFromFile(*cacheFilename); 
delete cacheFilename; 
35 if(pnTable==NULL) { 

// Exception. 

// Something happened to the proper noun list in the 
// filesystem. Load it from the network, 
loader. GetProperNounsFromURL(new 
40 AsynchronousLoadCallback^hisJ^nListServerVersion); 

} else { 

SetDefiler(pnTable); 

} 

} 

45 }eise{ 

SetAnnotatorProxy(); 

} 
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} 



5 void GnomelEPageBroker::SetAnnotatorProxy() { 
delete annotatorProxy; 

annotatorProxy=new HTMLAnnotatorProxy(defiler,config); 

////refCountLock->Lock(); 
1 0 ////defilerRefCount++; 

////refCountLock->Unlock(); 



15 void GnomelEPageBroker::SetDefiIer(ProperNounTable *pnTable) { 
// This object takes ownership of pnTable. 
delete defiler; 

string *annotationURLBase=config->GetAnnotationURLBase(); 
defiler=new Defiler(pnTable,*annotationURLBase); 
20 delete annotationURLBase; 

////defilerRefCount=0: 



25 



40 



SetAnnotatorProxy(); 



int GnomelEPageBroker::TakeLoad(ProperNounTable *pnTable,const std::string 
*pnText, unsigned int latestVersion) { 

// Create a new defiler from the proper noun table. Then, store the 
30 // latest proper noun list version and the list text in a file. 

// set the latest version in the registry 
eBRegistryMgr regMgrLM; 

regMgrLM.InitAppKey( H KE Y_LOCAL_M ACH I N E , eb_eboodle ); 
35 wcharj wc[100]; 

jtow( latestVersion , wc, 10); 
regMgrLM.WriteProfileString(RS_PRPOERNOUN, 
RS_PN_CLIENT_PN_VERSION, wc); 
SetDefiler(pnTable); 



string *cacheFilename=config->GetCacheFilename(); 
ListLoader::StoreProperNounslnFile(*cacheFilename,*pnText); 
delete cacheFilename; 



45 return 0; 
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HTMLAnnotatorProxy.cpp 

#include <stdafx.h> 
#include "HTMLAnnotatorProxy.h" 
5 #include "lEDocUtils.h" 
#include <string> 
#include <sstream> 
#include "eBScriptlltil.h" 

10 using namespace std; 



HTMLAnnotatorProxy::HTMLAnnotatorProxy(Defiler *defiler,GnomeBaseConfig *config) 
{ 

1 5 this->defiler=defiler; 

//RelevantTags::init(); 

if(config->lnDebugMode()) { 

string *cacheDir=config->GetCacheDirectory(); 
20 debugHTMLWriter=new DebugHTMLWriter(cacheDir->c_str()); 

delete cacheDir; 

} else 

debugHTMLWriter=NULL; 
25 annotationNum=0; 



HTMLAnnotatorProxy::-HTMLAnnotatorProxy() { 
30 delete debugHTMLWriter; 

} 



int HTMLAnnotatorProxy::Process(CComPtr<IWebBrowser2> pIWebBrowser, IDispatch* 
35 pDisp, IHTMLEIement *elem, bool &bAddSpanTag) 
{ 



40 



USES_CONVERSION; 



//CComBSTR tagNameBSTR; 

//HRESULT result=elem->getJagName(&tagNameBSTR); 



//if(FAILED(result)) 
45 // return -1 ; 
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Ids 



10 



15 



//_bstr_ttagNameWrapper(tagNameBSTR.m_str); // tagNameWrapper takes 
ownership of tagNameBSTR. 

//if(!RelevantTags::lsRelevantTag(&tagNameWrapper)) 
// return 0; 

int elemCountAllowed = isElementValidity(plWebBrowser ) elem); 
if( lelemCountAilowed ) 
return 0; 

//char *origHTML=IEDocUtils::GetHTML(elem); commented by RAJ 

CComBSTR htmlBSTR; 

eiem->getjnnerHTML(&htmlBSTR); 



if( elemCountAllowed > 0 && elemCountAllowed != 1 ) 
{ 

if(htmlBSTR.m_str && (wcslen(htmlBSTR.m_str) > elemCountAllowed)) 
return 0; 

20 } 
5 if( (htmlBSTR.m_str != NULL) ) { 

CO //if( (htmlBSTR.m_str != NULL) && (wcslen(htmlBSTR.m_str)<4000) ) { 

CO- string *annotatedHTML=defiler- 

H 25 >AnnotateHTML(&string(W2A(htmlBSTR.m_str)), elem, plWebBrowser, pDisp, 
W bAddSpanTag); 

W if(annotatedHTML != NULL) { 

e II There is an annotation, so do it. 

30 if(debugHTMLWriter != NULL) { 

// Write out the changes to a file, 
string *filePrefix=GetHTMLWriterFilename(); 
const char *filePrefixCStr=filePrefix->c_str(); 
//debugHTMLWriter- 
35 > WriteHTM L(filePref ixCStr.origHTML, ORIG I NAL_HTM L); 

debugHTMLWriter- 
>WriteHTML(filePrefixCStr,annotatedHTML->c_str(),ANNOTATED_HTML); 

delete filePrefix; 

} 

40 

// WriteLog("HTMLAnnotatorProxy::Process: Before IEDocUtils::SetHTML\n"); 

// SetSpanHTML(plWebBrowser, pDisp, elem,annotatedHTML->c_str()); 
// IEDocUtils::SetHTML(elem,annotatedHTML->c_str()); 
delete annotatedHTML; 

45 } 
} 
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//deleteQ origHTML; 
return 0; 



40 



} 



int HTMLAnnotatorProxy::isElementValidity(CComPtr<iWebBrowser2>& 

plWebBrowserJHTMLEIement * elem) 

{ 

if(!elem) 
1 0 return 0; 

CComVariant variantRet; 
CCom Variant variantList[1]; 
variantList[0].vt = VT_DISPATCH; 
1 5 elem->Query!nterface(iID_IDispatch ) (void **)&(variantl_ist[0].pdispVal)); 

eBScript::ExecFunc( pIWebBrowser, U'isValidElement", 1 , variantList, &variantRet ); 
if(variantRet.vt == VTJ4 && variantRet. I Val == 1 ) 
return 1 ; 

else if( variantRet. vt == VTJ4 && variantRet. IVal > 1 ) 
20 return variantRet. IVal ; 

else 

return 0; 

25 int HTMLAnnotatorProxy::SetSpanHTML(CComPtr<IWebBrowser2> pIWebBrowser, 

IDispatch* pDisp, 

IHTMLEIement *elem,const char *html) { 
int retval=0; 
CComVariant variantList[3]; 

30 

CComBSTR strHTML = html; 
variantList[0].vt = VT_BSTR; 
variantList[0].bstrVal = strHTMLCopy(); 

35 variantList[1].vt = VT_DISPATCH; 

elem->Querylnterface(IID_IDispatch I (void **)&(variantList[1].pdispVal)); 

variantList[2].vt = VT_DISPATCH; 

pDisp->Querylnterface(IID_IDispatch,(void **)&(variantl_ist[2].pdispVal)); 

eBScript::ExecFunc( pIWebBrowser, L"addSpanTag", 3, variantList, NULL ); 
return retval; 

} 

45 void HTMLAnnotatorProxy::SetDefiler(Defiler *defiler) { 
this->defiler=defiler; 

} 
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10 



15 



30 



35 



45 



string *HTMLAnnotatorProxy;:GetHTMLWriterFilename() { 
ostringstream oss; 
oss « "X_" « (annotationNum++); 
return new string(oss.str()); 

} 



* RelevantTags implementation. * 

* * 

*************************************** 



/* 

int RelevantTags::init() { 

20 // Initialize the static members of the class if they have not already 

3 II been initialized. 

B if(!islnitialized) { 
§ islnitialized=true; 
0 AddRelevantTags(); 
A 25 return 1 ; 

« } 



return 0; 



bool RelevantTags: :lsRelevantTag(const_bstr_t *tagWrapper) { 
return (tagset.find(*tagWrapper) != tagset.end()); 

} 



void RelevantTags: :AddRelevantTags() { 
// Add the relevant tags to the set. 
tagset.insert(_bstr_t("TD")); 
40 // tagset.insert(_bstr_t("FORM")); 
tagset.insert(_bstrJ( H P n )); 
tagset.insertLbstr_t("LI H )); 
tagset.insert(_bstr_t("UL")); 

} 



TAG_BSTR_SET RelevantTags: :tagset; 
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HTMLParser.cpp 

#include <stdafx.h> 
#include "HTMLParser.h" 
5 #include "bizratestring.h" 
#include "StringTokenizer.h" 

using namespace std; 

10 

const char *HTMLParser::DISCARDED_HTML_DELIMITERS = " \n\t"; 

const char *HTM LParser: : I N CLU DED HTM L_DEL I M ITE RS = "<>"; 

1 5 const string HTMLParser::EXCLUDED_BEGIN_TAGSQ = { 

••<textarea","<a ,, I Vform", "< 0 ption M , "<object" , "<embed"}; 

const string HTMLParser::EXCLUDED_END_TAGSQ = { 

V/textarea-.V/a", "</f 0 rm", "</optk>n" , "</object", "</embed"}; 

20 

const unsigned int HTMLParser::numExcludedTags = 6; 

P 25 

HTMLParser::HTMLParser(const string &html) { 

hi t 

W Parse(html); 
13 30 } 



HTMLParser::~HTMLParser() { 

35 

} 



const DocumentWordList *HTMLParser::GetDocumentWords() { 
40 return SdocWords; 

} 



int HTMLParser::Parse(const string &theHTML) { 
45 StringTokenizer 

tokenizer(theHTML,DISCARDED_HTML_DELIMITERSJNCLUDED_HTML_DELIMITER 
S,true); 
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10 



45 



int numTokens=tokenizer.NumTokens(); 

bool skipUntilEndOfTag=false; 

int wordlndex=-1; 

int excludedTaglndex=-1 ; 

for(int i=0;i<numTokens;i++) { 
wordlndex++; 

const string *token=tokenizer.GetToken(i); 
// Check if we're skipping over an anchor. 



// See if we're skipping over a tag whose body should be 
// completely exluded from annotation. 
1 5 if(excludedTag Index != -1 ) { 

// WeYe skipping over an exluded tag and its body 
if(lsExcludedEndTag(token,excludedTaglndex)) 

excludedTag!ndex=-1 ; 
continue; 

20 } 

B if( (excludedTaglndex=lsExcludedBeginTag(token)) != -1) 

0 continue; // Skip over this tag's body. 

fk 25 // Check if we're skipping over a tag. 

d if(skipUntilEndOfTag) { 

Ft if(token->at(0)== , >') 

y skipUntilEndOfTag=false; // Don't worry about getting 

rid of the "<" — it'll be trimmed in the code below. 

3 30 else 

n continue; 

1 ] 

1 if(token->at(0)== , < 1 ) { 

4 35 skipUntilEndOfTag=true; 

continue; 

} 

// Add the word to the doc. 
40 int leftpos,rightpos; 

GetTokenTrimIndices(token J &leftpos J &rightpos); 



if(leftpos>rightpos) 
continue; 

string *copy; 

if( (rightpos<token->size()-1) || (leftpos>0) ) 



GanzLaw, PC 
PO Box 10105 
Portland, Oregon 97296 
Phone: (503)228-3641 
Docket No.: BIZ/01-0003 

Express Mail No: EL627039307US, Deposited June 12, 2001 



-81 - 



10 } 



40 



45 



copy=new string(token->substr(leftpos f rightpos+1 )); 

else 

copy=new string(*token); 

docWords.AddWord(*copy,tokenizer.GetTokenPosition(i)+leftpos); 
delete copy; 

} 

return 0; 



void HTMLParser::GetTokenTrimIndices(const string *token,int *leftPoslndex,int 
*rightPoslndex) { 

1 5 // Return the indices into token at which non-alphanumeric characters 

// at either end stop. 

// Trim non-alphanumeric characters on the right side, 
int rightpos=token->size()-1 ; 
while(rightpos>=0) { 
20 char ch=token->at(rightpos); 

if( (!::isLetter(ch)) && (!::isNumber(ch)) ) 
rightpos-; 

else 

break; 

25 } 

// Trim non-alphanumeric characters on the left side, 
int leftpos=0; 

while(leftpos<=rightpos) { 
30 char ch=token->at(leftpos); 

if( (!::isLetter(ch)) && (!::isNumber(ch)) ) 
leftpos++; 

else 

break; 

35 } 



} 



*leftPoslndex=Ieftpos; 
*rightPosindex=rightpos; 



int HTMLParser::lsExcludedBeginTag(const string *token) { 
int rightpo=token->size()-1; 

// checks character by character for a matching tag ; 
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for(int i=0;i<numExcludedTags;i++) { 

const string *val = &EXCLUDED_BEGIN_JAGS[i] ; 
int ieftpo=0; 

if ( val->size() == token->size() ) { 
5 while(leftpo<=rightpo) { 

char ch=token->at(leftpo); 
char ch1=val->at(leftpo); 

if ( (ch1 == ch || abs(ch -ch1 ) == 32 ) ) 
10 leftpo++; 

else 

break ; 

} 

if ( leftpo == rightpo + 1 ) 
1 5 return i; 

} 



} 

20 return -1; 



25 



30 



bool HTMLParser::lsExcIudedEndTag(const string *tokenjnt taglndex) { 

// checks character by character for a matching tag ; 
int rightpo=token->size()-1; 

const string *val = &EXCLUDED_END_TAGS[tagIndex] ; 
int leftpo=0; 

while(leftpo<=rightpo) { 

char ch=token->at(leftpo); 
char ch1=val->at(leftpo); 

35 if ( (ch1 == ch || abs(ch -ch1) == 32 ) ) 

leftpo++; 

else 

break ; 

} 

40 if ( leftpo == rightpo + 1 ) 

return true; 



// if(*token==EXCLUDED_BEGIN_TAGS[i]) 
45 // return i; 

return false; 
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//returntolowercopy(token)==tolowercx)py(&EXCLUDED_END_TAGS[taglndex]); 

} 



P 

m 
m 

w 

to 
U 
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o 
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lEDocUtils.cpp 

#include <stdafx.h> 
#include "lEDocUtils.h" 
5 #include <ExDisp.h> 
#include "bstrutil.h" 
#include "eBScriptUtil.h" 
using namespace std; 

10 // Function prototypes. 

void AddDocumentAndDescendents(IHTMLDocument2 *doc,DOCUMENT_VECTOR 
*vec); 



15 



20 



30 



40 



IHTMLDocument2 *IEDocUtils::GetMainDocument(IWebBrowser2 "browser) { 
// Return the main document of the web browser. 
I HTM LDocument2 *doc=NULL; 
IDispatch *dispatch; 



HRESULT result=browser->get_Document(&dispatch); 
if(SUCCEEDED(result)) { 

result=dispatch->Queryl nterface( 1 1 D_l HTM LDocument2,(void **) &doc); 

dispatch->Release(); 

25 } 

return doc; 



char *IEDocUtils::GetURL(IHTMLDocument2 *doc) { 
char *theURL = NULL; 



BSTR urIBSTR; 
35 HRESULT resuit=doc->get_URL(&urlBSTR); 

if(SUCCEEDED(result)) { 

theURL=getCharArrayFromBSTR(urlBSTR); 

::SysFreeString(urlBSTR); 

} 



return theURL; 



45 char *IEDocUtils::GetHTML(IHTMLDocument2 *doc) { 
// Return the HTML of the document, 
char *html = NULL; 
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10 } 



20 



25 



IHTMLEiement *body; 
HRESULTresult=doc->get_body(&body); 

if(SUCCEEDED(result)) { 

html=IEDocUtils::GetHTML(body); 
body->Release(); 

} 

return html; 



char * I EDocUtils::GetHTML( IHTMLEiement *elem) { 

// Get the HTML of the specified element. This method returns NULL 
15 //if there is an error. Destroy the character array that is returned 

// when you're finished with it. 
char *theHTML=NULL; 
BSTR htmlBSTR; 

HRESULTresult=elem->get_innerHTML(&htmlBSTR); 



if( (SUCCEEDED(result)) && (htmlBSTR != NULL) ) { 
theHTML=getCharArrayFromBSTR(htmlBSTR); 
: :SysFreeString(htmlBSTR); 

} 

return theHTML; 



30 int IEDocUtils::WalkThroughDocument(HTMLEIementProcessor 
*processor,IHTMLDocument2 *doc, CComPtr<IWebBrowser2> plWebBrowser, 
IDispatch* pDisp) { 

// Go through every element in the document and run the specified 
// processor on it. 
35 WriteLog("IEDocUtils::WalkThroughDocument: in\n M ); 

int retval=0; 
bool bAddSpanTag=false; 

IHTMLEIementCollection Collection; 
40 HRESULT result=doc->get_all(&collection); 

if(SUCCEEDED( result)) { 
long num Items; 

result=collection->getJength(&numltems); 

45 for(long i=0;i<numltems;i++) { 

// Iterate through each element and send it to the processor. 
VARIANT nameVar; 
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nameVar.vt=VTJ4; 
nameVar.lVal=i; 

VARIANT emptyVar = { 0 }; 

IDispatch *dispatch; 

result=coIlection->item(nameVar,emptyVar 1 &dispatch); 



1 0 if( (SUCCEEDED(result)) && (dispatch != NULL) ) { 

IHTMLEIement *elem = NULL; 

result=dispatch->Querylnterface(IID_IHTMLEIement,(void 



*) &elem); 
bAddSpanTag); 



if(SUCCEEDED(result)) { 
15 processor->Process(plWebBrowser, pDisp, elem, 

elem->Release(); 

} 



20 dispatch->Release(); 

} 

} 

collection->Release(); 
25 if(bAddSpanTag) 
{ 

CComPtr<IWebBrowser2> pmainWebBrowser; 
if(pDisp) 

pDisp->Querylnterface(IID_IWebBrowser2,(void 

30 **)&pmainWebBrowser); 

CComPtr<IHTMLDocument2> pdoc; 

if(pmainWebBrowser) 

{ 

CComPtr<IDispatch> pdispDoc; 
35 pmainWebBrowser->get_Document(&pdispDoc); 

pdispDoc->Querylnterface(IIDJHTMLDocument2,(void 

**)&pdoc); 

} 

if(pdoc) 
40 { 

CCom Variant variantList[1]; 
variantList[0].vt = VT^DISPATCH; 
pdoc->Q ue ry I nte rface( 1 1 D_l Dispatch , (void 

**)&(variantList[0].pdispVal)); 
45 CComVariant vResult; 

eBScript::ExecFunc( plWebBrowser, L"ShowBuyLitesTab", 

1 , variantList, &vResult ); 
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10 



25 



30 



} else 

retval=-1 ; 
return retval; 

} 



int IEDocUtils::SetHTML(IHTMLEIement *elem,const char *html) { 
int retval=0; 

15 // if( NULL != strstr(html,"<OBJECT") || NULL != strstr(html,"<IFRAME")) 
// return retval; 

BSTR htmlBSTR=makeBSTR(html); 
elem->put_innerHTML(htmlBSTR); 
20 ::SysFreeString(htmlBSTR); 

□ return retval; 



DOCUMENT_VECTOR *IEDocUtils::GetFrames(IWebBrowser2 browser) { 

// Returns a vector of smart points to IHTMLDocument2 objects, each of which 
// is a frame in the web browser. 

DOCUMENTVECTOR *vec=new DOCU M ENT_VECTOR; 



IHTMLDocument2 *doc=GetMainDocument(browser); 
if(doc != NULL) { 

AddDocumentAndDescendents(doc,vec); 

doc->Release(); 

35 } 

return vec; 



40 

DOCUMENT_VECTOR *IEDocUtils::GetAIIDescendentFrames(IHTMLDocument2 *doc) 
{ 

// Returns a vector of smart points to IHTMLDocument2 objects, each of which 
// is a frame under the document doc. 
45 DOCU M ENT_VECTOR *vec=new DOCUMENT VECTOR; 

AddDocumentAndDescendents(doc,vec); 
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return vec; 

} 



5 static void AddDocumentAndDescendents(IHTMLDocurrient2 

*doc,DOCUMENT_VECTOR *vec) { 

// The method's client should call doc->Release() when this method returns. 
IHTMLDocument2Ptr docSmartPtr(doc); // AddRef automatically 

called internally. 
1 0 vec->push_back(docSmartPtr); 

IHTMLFramesCollection2 *framesCollection; 
HRESULT result=doc->get_frames(&framesCollection); 
if(SUCCEEDED(result)) { 
15 long numFrames; 

framesCollection->get_length(&numFrames); 

for(long l=0;l<numFrames;l++) { 
VARIANT indexVariant; 
20 indexVariant.lVal=l; 

indexVariant.vt=VTJ4; 

VARIANT frameVariant; 

25 result=framesCollection->item(&indexVariant,&frameVariant); 

if(SUCCEEDED(result)) { 

IDispatch *dispatch=frameVariant.pdispVal; 
IHTMLWindow2 *frameWindow; 

result=dispatch->Querylnterface(IID_IHTMLWindow2,(void 



30 **) &frameWindow); 



35 



45 



if(SUCCEEDED(result)) { 

IHTMLDocument2 *frameDoc; 
result=frameWindow->get_document(&frameDoc); 

if(SUCCEEDED(result)) { 



AddDocumentAndDescendents(frameDoc,vec); // Add this child and its 
descendents. 

40 frameDoc->Release(); 

} 



frameWindow->Release(); 

} 

dispatch->Release(); 
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} 

framesCollection->Release(); 

} 

} 
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ProperNounDB.cpp 

#include <stdafx.h> 
#include "ProperNounDB.h" 
5 #include "StringTokenizer.h" 
#include <iostream> 
#include <fstream> 
#include <strstream> 
#include <assert.h> 

10 

using namespace std; 



1 5 ProperNounDB::ProperNounDB() { 
} 



20 ProperNounDB::~ProperNounDB() { 
} 



25 void ProperNounDB::AddProperNoun(const ProperNoun &properNoun) { 
// Store a copy in the database. 
properNouns.push_back(properNoun); 

} 

30 

const ProperNoun *ProperNounDB::GetProperNounByPhrase(string &phrase) const { 
// Return the proper noun matching the specified phrase, or return NULL 
// if no such proper noun exists in the database. 
PROPERNOUN_VECTOR::const_iterator iter=properNouns.begin(); 
35 PROPERNOUN_VECTOR::const_iteratortheEnd=properNouns.end(); 



const ProperNoun *pn = NULL; 
while( (iter != theEnd) && (pn==NULL) ) { 
const ProperNoun &thePn=*iter; 
40 string *str=thePn.GetProperNounPhrase(); 

if(*str==phrase) 

pn=&thePn; 

delete str; 
45 iter++; 
} 
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return pn; 

} 



5 const ProperNoun *ProperNounDB::GetProperNounByld(int pnld) const { 
// Return the proper noun matching the specified proper noun id, or 
// return NULL if no such proper noun exists in the database. 
PROPERNOUN_VECTOR::const_iterator iter=properNouns.begin(); 
PROPERNOUN_VECTOR: :const_iterator theEnd=properNouns.end(); 

10 

const ProperNoun *pn = NULL; 
while( (iter != theEnd) && (pn==NULL) ) { 
const ProperNoun &thePn=*iter; 
if(thePn.Getld()==pnld) 
1 5 pn=&thePn; 

iter++; 

} 

20 return pn; 

} 



const PROPERNOUN__VECTOR *ProperNounDB::GetAIIProperNouns() const { 
25 return &properNouns; 

} 



int ProperNounDB::PersistToFile(const char *filename) const { 
30 // Write the database to a file. It can be loaded later, 

ofstream out(filename); 

char *encodedStr=CreateStringForDB(); 

35 out « encodedStr; 

deleteQ encodedStr; 

out.flush(); 
40 out.close(); 

return 0; 

} 

45 

int ProperNounDB::LoadFromFile(const char *filename) { 
ifstream in(filename); 
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string completeDB; 
string str; 

while(getline(in,str)) { 
5 completeDB.append("\n"); 

completeDB.append(str); 

} 

in.close(); 

10 

return CreateFromString(completeDB.c_str()); 

} 



1 5 int ProperNounDB::CreateFromString(const char *encodedStr) { 
string rawData(encodedStr); 

// First, tokenize into lines containing proper nouns. Each is on 
// its own line. Lines are separated by a single newline character. 
20 StringTokenizertokenizer(rawData,"\n","",false); 

int numProperNouns=tokenizer.NumTokens(); 
for(int i=0;i<numProperNouns;i++) { 

const string *properNounLine=tokenizer.GetToken(i); 
StringTokenizerlineTokenizer(*properNounLine, , T , / , " ) false); 
25 int numColumns=lineTokenizer.NumTokens(); 

assert(numColumns > 1); 

// First column is the proper noun phrase. Subsequent columns 
30 // are proper noun ids. 

const string *properNounPhrase=lineTokenizer.GetToken(0); 
const string *properNounld=lineTokenizer.GetToken(1 ); 

int pnld=atoi(properNounld->c_str()); 
35 AddProperNoun(ProperNoun(*properNounPhrase,pnld)); 

} 

return 0; 

} 

40 

char *ProperNounDB::CreateStringForDB() const { 
ostrstream oss; 

PROPERNOUN_VECTOR::const_iterator iter=properNouns.begin(); 
45 PROPERNOUN_VECTOR::const_iteratortheEnd=properNouns.end(); 

while(iter != theEnd) { 
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const ProperNoun &pn=*iter; 

// Write the proper noun phrase. 

string *pnPhrase=pn.GetProperNounPhrase(); 

5 oss « (*pnPhrase) « "|"; 

delete pnPhrase; 

// Write the proper noun id. 
oss « pn.Getld(); 

10 

// Newline line delimiter, 
oss « "\n"; 

iter++; 

15 } 

oss « ends; 
return oss.str(); 

20 } 
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